#################################################################################
# Replication file for: "Accounting for Noncompliance in Survey Experiments"    #
#                                                                               #
# Jeffrey J. Harden                                                             #
# University of Notre Dame                                                      #
# jeff.harden@nd.edu                                                            #
#                                                                               #
# Anand E. Sokhey                                                               #
# University of Colorado Boulder                                                #
# anand.sokhey@colorado.edu                                                     #
#                                                                               #
# Katherine L. Runge                                                            #
# University of Colorado Boulder                                                #
# katherine.runge@colorado.edu                                                  # 
#                                                                               #
# Replication of Harden (2016)                                                  #
# Last update: August 2, 2018                                                   #
#################################################################################
### Packages ###
library(foreign)
library(AER)
library(boot)
library(MASS)
library(ggplot2)

set.seed(52433)

### Data ###
qualtrics <- read.dta("qualtrics.dta")
qualtrics <- qualtrics[qualtrics$income < 15 & !is.na(qualtrics$income) & 
             !is.na(qualtrics$edufundtime3) & 
             !is.na(qualtrics$edufundtherm) &
             !is.na(qualtrics$attention) &
             !is.na(qualtrics$party) &
             !is.na(qualtrics$ideology) &             
             !is.na(qualtrics$female) &
             !is.na(qualtrics$race) &
             !is.na(qualtrics$marital) &
             !is.na(qualtrics$children) &
             !is.na(qualtrics$citizen) &
             !is.na(qualtrics$education) &
             !is.na(qualtrics$home) &
             !is.na(qualtrics$employ), ]
qualtrics$children2 <- ifelse(qualtrics$children <= 3, qualtrics$children, 4)
  
## Experimental conditions ##
qualtrics$fairshare <- ifelse(qualtrics$edufundtreat == "fairshare", 1, 0)
qualtrics$porkbarrel <- ifelse(qualtrics$edufundtreat == "porkbarrel", 1, 0)

### Compliance ###
## Page Latency time -- time between clicks on either side of the vignette ##
qualtrics$latencytime <- qualtrics$edufundtime3
qualtrics$dt.group[qualtrics$latencytime <= 15] <- "< 15 seconds"
qualtrics$dt.group[qualtrics$latencytime > 15 & qualtrics$latencytime <= 25] <- "15-25 seconds"
qualtrics$dt.group[qualtrics$latencytime > 25 & qualtrics$latencytime <= 35] <- "25-35 seconds"
qualtrics$dt.group[qualtrics$latencytime > 35 & qualtrics$latencytime <= 45] <- "35-45 seconds"
qualtrics$dt.group[qualtrics$latencytime > 45] <- "> 45 seconds"
qualtrics$dt.group <- factor(qualtrics$dt.group, levels = c("< 15 seconds", "15-25 seconds", "25-35 seconds", "35-45 seconds", "> 45 seconds"))

# Graph latency time
# Figure A2
pdf("figureA2.pdf")

ggplot(qualtrics, aes(x = latencytime, fill = dt.group)) + 
geom_histogram(binwidth = 3, alpha = .7) + 
scale_fill_brewer(palette = "Set1") +
scale_y_continuous(limits = c(0, 200), breaks = seq(0, 200, 20)) +
scale_x_continuous(limits = c(0, 250), breaks = seq(0, 250, 25)) +
theme(legend.position = c(1, 1), legend.justification = c(1, 1), legend.text = element_text(size = 12), legend.title = element_blank(), axis.text = element_text(size = 15), axis.title.y = element_text(size = 20, vjust = 2), axis.title.x = element_text(size = 20, vjust = -.5)) + ylab("Frequency") + xlab("Latency time (seconds)")

dev.off()

# Compliance indicators: Is the submit time > {15, 25, 35, 45} seconds?
qualtrics$comply15 <- ifelse(qualtrics$latencytime > 15, 1, 0)
qualtrics$comply25 <- ifelse(qualtrics$latencytime > 25, 1, 0)
qualtrics$comply35 <- ifelse(qualtrics$latencytime > 35, 1, 0)
qualtrics$comply45 <- ifelse(qualtrics$latencytime > 45, 1, 0)

qualtrics$fs.comply15 <- ifelse(qualtrics$fairshare == 1 & qualtrics$comply15 == 1, 1, 0) 
qualtrics$pb.comply15 <- ifelse(qualtrics$porkbarrel == 1 & qualtrics$comply15 == 1, 1, 0) 
qualtrics$fs.comply25 <- ifelse(qualtrics$fairshare == 1 & qualtrics$comply25 == 1, 1, 0) 
qualtrics$pb.comply25 <- ifelse(qualtrics$porkbarrel == 1 & qualtrics$comply25 == 1, 1, 0) 
qualtrics$fs.comply35 <- ifelse(qualtrics$fairshare == 1 & qualtrics$comply35 == 1, 1, 0) 
qualtrics$pb.comply35 <- ifelse(qualtrics$porkbarrel == 1 & qualtrics$comply35 == 1, 1, 0) 
qualtrics$fs.comply45 <- ifelse(qualtrics$fairshare == 1 & qualtrics$comply45 == 1, 1, 0) 
qualtrics$pb.comply45 <- ifelse(qualtrics$porkbarrel == 1 & qualtrics$comply45 == 1, 1, 0) 

### Treatment effects ###
## ITT -- regress y on randomized treatment assignment ##
itt <- lm(edufundtherm ~ porkbarrel, data = qualtrics)

## As-treated -- regress y on treatment received (Imbens and Rubin 2015, 535) ##
# Assumption: Pork barrel compliers received treatment, all others untreated
as.treat15 <- lm(edufundtherm ~ pb.comply15, data = qualtrics)
as.treat25 <- lm(edufundtherm ~ pb.comply25, data = qualtrics)
as.treat35 <- lm(edufundtherm ~ pb.comply35, data = qualtrics)
as.treat45 <- lm(edufundtherm ~ pb.comply45, data = qualtrics)

## Per-protocol -- regress y on treatment received among treatment compliers and those assigned to control (Imbens and Rubin 2015, 537) ##
# Note: Noncompliers in treatment group dropped; untreated noncompliers remain in sample
per.pro15 <- lm(edufundtherm ~ pb.comply15, data = subset(qualtrics, pb.comply15 == 1 | porkbarrel == 0)) 
per.pro25 <- lm(edufundtherm ~ pb.comply15, data = subset(qualtrics, pb.comply25 == 1 | porkbarrel == 0))  
per.pro35 <- lm(edufundtherm ~ pb.comply15, data = subset(qualtrics, pb.comply35 == 1 | porkbarrel == 0))  
per.pro45 <- lm(edufundtherm ~ pb.comply15, data = subset(qualtrics, pb.comply45 == 1 | porkbarrel == 0)) 

## CACE -- Use randomization as instrument for complier indicator ##
cace15 <- ivreg(edufundtherm ~ pb.comply15 | porkbarrel, data = qualtrics)
cace25 <- ivreg(edufundtherm ~ pb.comply25 | porkbarrel, data = qualtrics)
cace35 <- ivreg(edufundtherm ~ pb.comply35 | porkbarrel, data = qualtrics)
cace45 <- ivreg(edufundtherm ~ pb.comply45 | porkbarrel, data = qualtrics)

### Graph results ###
d.pb <- data.frame(
  pe = 
    c(coef(itt)[2], 
      coef(as.treat15)[2], coef(as.treat25)[2], coef(as.treat35)[2], coef(as.treat45)[2],
      coef(per.pro15)[2], coef(per.pro25)[2], coef(per.pro35)[2], coef(per.pro45)[2],
      coef(cace15)[2], coef(cace25)[2], coef(cace35)[2], coef(cace45)[2]),
  lo = 
    c(confint(itt)[2, 1], 
      confint(as.treat15)[2, 1], confint(as.treat25)[2, 1], confint(as.treat35)[2, 1], confint(as.treat45)[2, 1],
      confint(per.pro15)[2, 1], confint(per.pro25)[2, 1], confint(per.pro35)[2, 1], confint(per.pro45)[2, 1],
      confint(cace15)[2, 1], confint(cace25)[2, 1], confint(cace35)[2, 1], confint(cace45)[2, 1]),
  hi = 
    c(confint(itt)[2, 2], 
      confint(as.treat15)[2, 2], confint(as.treat25)[2, 2], confint(as.treat35)[2, 2], confint(as.treat45)[2, 2],
      confint(per.pro15)[2, 2], confint(per.pro25)[2, 2], confint(per.pro35)[2, 2], confint(per.pro45)[2, 2],
      confint(cace15)[2, 2], confint(cace25)[2, 2], confint(cace35)[2, 2], confint(cace45)[2, 2]),
  Estimand = c("ITT", rep(c("As-treated", "Per-protocol", "CACE"), each = 4)),
  Cutoff = c("None", rep(c("15 (74% compliers)", "25 (62% compliers)", "35 (44% compliers)", "45 (28% compliers)"), times = 3))
)

# Proportion compliers
mean(qualtrics$comply15)
mean(qualtrics$comply25)
mean(qualtrics$comply35)
mean(qualtrics$comply45)

d.pb2 <- as.data.frame(apply(d.pb[ , 1:3], 2, rev))
d.pb2$Estimand <- rev(d.pb$Estimand)
d.pb2$Estimand = factor(d.pb2$Estimand, levels = c("CACE", "Per-protocol", "As-treated", "ITT"))
d.pb2$Cutoff <- rev(d.pb$Cutoff)
d.pb2$Cutoff = factor(d.pb2$Cutoff, levels = c("None", "15 (74% compliers)", "25 (62% compliers)", "35 (44% compliers)", "45 (28% compliers)"))
d.pb2$y <- c(.8, .9, 1.1, 1.2, 1.8, 1.9, 2.1, 2.2, 2.8, 2.9, 3.1, 3.2, 4)

# Figure A1
pdf("figureA1.pdf", width = 10)

ggplot(d.pb2, aes(y = y, x = pe, color = Cutoff)) +
  scale_color_brewer(name = "Latency time cutoff (seconds)", palette = "Set1") +
  geom_point(lwd = 3.75) +
  geom_segment(aes(y = y, yend = y, x = lo, xend = hi), lwd = 1) +
  geom_vline(xintercept = 0) +
  scale_y_continuous(breaks = 1:4, labels = c("CACE", "Per-protocol", "As-treated", "ITT")) +
  scale_x_continuous(limits = c(-28, 8), breaks = seq(-28, 8, 4)) +
  xlab("Treatment effect") + ylab("Estimand") +
  theme(legend.position = "bottom", legend.text = element_text(size = 16), axis.text.y = element_text(size = 15), axis.title.y = element_text(size = 20, vjust = 2), axis.title.x = element_text(size = 20, vjust = -.5), axis.text.x = element_text(size = 12), legend.title = element_text(size = 18)) + guides(color = guide_legend(ncol = 2, title.hjust = .5, title.size = 15))

dev.off()

save.image("harden-replication.RData")
